# Copyright 2025 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Integration test for the compile commands database merger.

This script has dependencies on compile command fragments, and uses the
merger to create compile commands databases. Since the collected compile
commands won't be deterministic (if the build directory has trailing compile
commands fragments from prior builds), compile commands correctness checks
are filtered by a regex of known files patterns that should be validate.

Because this does nested `bazel` calls under the hood, and because the merger
depends on BUILD_WORKSPACE_DIRECTORY, this test must be `bazel run` rather
than `bazel test`.
"""

import json
import os
from pathlib import Path
import re
import shlex
import subprocess
import tempfile
import unittest

from python.runfiles import runfiles  # type: ignore

# These are generated by the pw_py_importable_runfile rules in the BUILD.bazel
# file.
from pw_ide import clangd_binary, update_compile_commands_binary


_INCLUDE_PREFIXES = (
    '-I',
    '-isystem',
    '-iquote',
)

# pylint: disable=line-too-long
_TEST_CPP_TARGET = (
    '//pw_ide/bazel/compile_commands/test:test_compile_commands_outputs'
)
# pylint: enable=line-too-long


def _get_host_platform() -> str:
    """Searches the CWD to determine the current execution platform name.

    The output structure is shaped something like this:

        execroot/_main/bazel-out/darwin_arm64-fastbuild/bin/pw_ide/...
                                 ~~~~~~~~~~~~~~~~~~~~~~

    So we can find the "host" platform name after the `bazel-out` bit. This
    is a little hacky, but BUILD_EXECROOT should give us more direct access
    to this information in Bazel 9.0.0, so for now it's good enough.
    """
    cwd_parts = Path.cwd().parts
    bazel_out_idx = cwd_parts.index('bazel-out')
    return cwd_parts[bazel_out_idx + 1]


_HOST_PLATFORM = _get_host_platform()

# Use the same suffix (e.g. fastbuild) as the host build.
_DEVICE_PLATFORM = 'rp2040-' + _HOST_PLATFORM.split('-')[1]

# Helpful pattern to cover both host and target test platforms.
_HOST_OR_DEVICE = f'({_HOST_PLATFORM})|({_DEVICE_PLATFORM})'

# All tested rules internal to @pigweed live in this package.
_TEST_PACKAGE = r'.*pw_ide/bazel/compile_commands/test/'

# All tested rules hosted in an external repo live in this package.
_EXTERNAL_PACKAGE = r'.*pw_cc_compile_commands_test_external/'

# Anything in the external or local test packages.
_ANY_TEST_PACKAGE = '(' + _TEST_PACKAGE + '|' + _EXTERNAL_PACKAGE + ')'


def _format_clangd_error(
    clangd_result: subprocess.CompletedProcess,
    db_path: Path,
    command: dict,
) -> str:
    return '\n'.join(
        (
            f'clangd --check in {db_path} failed:',
            f'ENTRY: {command}',
            f'CMD: {shlex.join(clangd_result.args)}',
            f'STDOUT:\n{clangd_result.stdout}',
            f'STDERR:\n{clangd_result.stderr}',
        )
    )


class CompileCommandsTestBase(unittest.TestCase):
    """A base class with tests for compile commands integration."""

    _all_compile_commands: dict[Path, list]
    temp_dir: tempfile.TemporaryDirectory
    clangd_path: Path
    project_root: Path

    @classmethod
    def setUpClass(cls):
        if cls is CompileCommandsTestBase:
            raise unittest.SkipTest('Tests are skipped on this base class')

        cls.runfiles = runfiles.Create()
        cls.clangd_path = cls.runfiles.Rlocation(*clangd_binary.RLOCATION)
        cls.updater_path = cls.runfiles.Rlocation(
            *update_compile_commands_binary.RLOCATION
        )
        assert 'BUILD_WORKSPACE_DIRECTORY' in os.environ, (
            'This must be `bazel run` to work properly, and cannot be tested '
            'via `bazel test`'
        )
        cls.project_root = os.environ.get('BUILD_WORKSPACE_DIRECTORY')
        cls.temp_dir = tempfile.TemporaryDirectory()
        cls._all_compile_commands = {}

    @classmethod
    def _load_databases(cls):
        """Loads all compile command databases from a directory."""
        compile_db_paths = list(
            Path(cls.temp_dir.name).rglob('compile_commands.json')
        )
        if not compile_db_paths:
            raise RuntimeError('No compile_commands.json files found')

        for db_path in compile_db_paths:
            # TODO: https://pwbug.dev/444224547 - This fails on gcc builds
            # because `-fno-canonical-system-headers` is unknown.
            if 'stm32f429i' in str(db_path):
                continue

            with open(db_path, 'r') as f:
                compile_commands = json.load(f)

            if isinstance(compile_commands, list) and compile_commands:
                cls._all_compile_commands[db_path] = compile_commands

    def _find_commands_for_file(
        self, file_pattern: str, platform_pattern: str | None = None
    ) -> list[tuple[Path, dict]]:
        """Finds all compile commands for a file matching a pattern."""
        matches = []
        for db_path, commands in self._all_compile_commands.items():
            db_platform = Path(db_path).parent.parts[-1]
            if platform_pattern and not re.match(platform_pattern, db_platform):
                continue
            for command in commands:
                if re.match(file_pattern, command['file']):
                    matches.append((db_path, command))
        return matches

    def _run_clangd_check(
        self,
        db_path: Path,
        command: dict,
    ) -> subprocess.CompletedProcess:
        """Run clangd --check on a given file."""
        file_path = command['file']
        return subprocess.run(
            [
                self.clangd_path,
                f'--compile-commands-dir={db_path.parent}',
                f'--check={file_path}',
            ],
            capture_output=True,
            text=True,
            check=False,
            # The compile commands need to run from the project root
            # for relative path resolution to work.
            cwd=self.project_root,
        )

    def _assert_file_is_in_db_for_config(
        self, file_pattern: str, platform_pattern: str
    ):
        """Asserts that a file is in the DB for a given config."""
        matches = self._find_commands_for_file(
            file_pattern, platform_pattern=platform_pattern
        )
        self.assertGreater(
            len(matches),
            0,
            f'No command found for "{file_pattern}" in config '
            f'"{platform_pattern}"',
        )

    def test_files_are_valid(self):
        """Checks various file's compile command with clangd."""
        matches = self._find_commands_for_file(
            _TEST_PACKAGE + r'.*\.cc?',
            platform_pattern=_HOST_OR_DEVICE,
        )
        self.assertGreater(
            len(matches),
            0,
            'Test files missing from compile command databases.',
        )

        for db_path, command in matches:
            with self.subTest(
                f'Checking {command["file"]} from {db_path.parent}'
            ):
                clangd_result = self._run_clangd_check(db_path, command)
                self.assertEqual(
                    clangd_result.returncode,
                    0,
                    _format_clangd_error(clangd_result, db_path, command),
                )

    def test_external_file_is_valid(self):
        """Checks an external file's compile command with clangd."""
        matches = self._find_commands_for_file(
            _EXTERNAL_PACKAGE + r'.*\.cc?',
            platform_pattern=_HOST_OR_DEVICE,
        )
        self.assertGreater(
            len(matches), 0, 'External test file command not found.'
        )

        for db_path, command in matches:
            with self.subTest(
                f'Checking {command["file"]} from {db_path.parent}'
            ):
                clangd_result = self._run_clangd_check(db_path, command)
                self.assertEqual(
                    clangd_result.returncode,
                    0,
                    _format_clangd_error(clangd_result, db_path, command),
                )

    def test_headers_are_not_present(self):
        """Checks header files don't end up in the command databases."""
        matches = self._find_commands_for_file(
            _ANY_TEST_PACKAGE + r'.*\.h',
            platform_pattern=_HOST_OR_DEVICE,
        )
        self.assertEqual(
            len(matches),
            0,
            'Test headers should not end up in the database.',
        )

    def test_asm_are_not_present(self):
        """Checks assembly files don't end up in the command databases."""
        matches = self._find_commands_for_file(
            '(' + _TEST_PACKAGE + '|' + _EXTERNAL_PACKAGE + ')' + r'.*\.(s|S)',
            platform_pattern=_HOST_OR_DEVICE,
        )
        self.assertEqual(
            len(matches),
            0,
            'Assembly files should not end up in the database.',
        )

    def test_uncompiled_files_are_not_present(self):
        """Checks uncompiled files don't end up in the command databases."""
        matches = self._find_commands_for_file(
            _ANY_TEST_PACKAGE + r'.*uncompiled_cc_test\.cc',
            platform_pattern=_HOST_OR_DEVICE,
        )
        self.assertEqual(
            len(matches),
            0,
            'Uncompiled files should not end up in the database.',
        )

    def test_external_include_path_is_present(self):
        """Checks for an external repo's include path."""
        matches = self._find_commands_for_file(
            _EXTERNAL_PACKAGE + r'.*\.cc?',
            platform_pattern=_HOST_OR_DEVICE,
        )
        self.assertGreater(
            len(matches), 0, 'External test file command not found.'
        )

        expected_include_path = 'pw_cc_compile_commands_test_external'
        for db_path, command in matches:
            with self.subTest(
                f'Checking {command["file"]} from {db_path.parent}'
            ):
                self.assertTrue(
                    any(
                        arg.startswith('-I') and expected_include_path in arg
                        for arg in command['arguments']
                    ),
                    'External repo include path not found in compile commands.',
                )

    def test_no_virtual_includes(self):
        """Ensures no _virtual_includes paths are in the compile commands.

        Note: This assumes all test targets do not depend on any libraries
        where someone has elected to generate their own _virtual_includes
        library. While the aspect should respond correctly to those cases,
        it's harder to test them here without additional build graph metadata.
        """
        matches = self._find_commands_for_file(
            _ANY_TEST_PACKAGE + r'.*',
            platform_pattern=_HOST_OR_DEVICE,
        )
        for db_path, command in matches:
            with self.subTest(
                f'Checking {command["file"]} from {db_path.parent}'
            ):
                args = command['arguments']
                i = 0
                while i < len(args):
                    arg = args[i]
                    path_str = None
                    if arg in _INCLUDE_PREFIXES:
                        if i + 1 < len(args):
                            path_str = args[i + 1]
                            i += 1
                    elif arg.startswith('-I'):
                        path_str = arg[2:]

                    if path_str:
                        self.assertNotIn(
                            '_virtual_includes',
                            path_str,
                            f'Found _virtual_includes in {path_str}',
                        )
                    i += 1

    def test_include_paths_exist(self):
        """Ensures all include paths point to real dirs.

        This is quite complex due to bazel's behavior of generating a real
        and a bazel-out include for each `includes` or `quote_includes` entry.
        """
        matches = self._find_commands_for_file(
            _ANY_TEST_PACKAGE + r'.*',
            platform_pattern=_HOST_OR_DEVICE,
        )
        for db_path, command in matches:
            with self.subTest(
                f'Checking {command["file"]} from {db_path.parent}'
            ):
                # 1. Collect all include paths from the compile command.
                all_include_paths: list[Path] = []
                args = command['arguments']
                i = 0
                while i < len(args):
                    arg = args[i]
                    path_str = None

                    # Handles '-I foo' and '-isystem foo'
                    if arg in _INCLUDE_PREFIXES:
                        if i + 1 < len(args):
                            path_str = args[i + 1]
                            i += 1
                    # Handles '-Ifoo' and '-isystemfoo'
                    else:
                        for prefix in _INCLUDE_PREFIXES:
                            if arg.startswith(prefix):
                                path_str = arg[len(prefix) :]
                                break

                    if path_str:
                        include_path = Path(path_str)
                        all_include_paths.append(include_path)
                    i += 1

                # 2. Separate paths into "real" and "bazel-out" paths.
                real_paths: list[Path] = []
                bazel_bin_paths: list[dict] = []
                bin_dir_re = re.compile(
                    r'(?:^|(?:.*/))bazel-out/[^/]+/bin/(?P<suffix>.+)'
                )

                for path in all_include_paths:
                    match = bin_dir_re.match(str(path))
                    if match:
                        bazel_bin_paths.append(
                            {'path': path, 'suffix': match.group('suffix')}
                        )
                    else:
                        real_paths.append(path)

                    self.assertFalse(
                        str(path).startswith('bazel-out/'),
                        f'Relative, generated include path {path} was not '
                        'remapped to its absolute path by the merger',
                    )
                    self.assertFalse(
                        str(path).startswith('external/'),
                        f'Relative, external include path {path} was not '
                        'remapped to its absolute path by the merger',
                    )

                # For every `includes` and `quote_includes` path, Bazel
                # generates a second include in `bazel-bin` that contains any
                # files that were generated in the package. We don't necessarily
                # know which of these two include paths exist, but at least
                # *one* should.

                # 3. Check generated paths first, since it's easier to find the
                # real version from the parallel bazel-out path.
                for bin_path_info in bazel_bin_paths:
                    bin_path = bin_path_info['path']
                    suffix = bin_path_info['suffix']

                    self.assertTrue(
                        bin_path.is_absolute(),
                        f'bazel-bin path `{bin_path}` is generated, but was '
                        'not resolved to an absolute path by the merger',
                    )

                    # If path doesn't exist. Find a real path that has a
                    # matching suffix and check that.
                    if not bin_path.is_dir():
                        maybe_path: Path | None = None
                        execroot = str(bin_path).split('/execroot/', 1)[0]
                        for real_path in real_paths:
                            # Only match against project-relative paths, or
                            # external/ paths just below the execroot.
                            if re.match(
                                r'(?:^)|(?:' + execroot + r'/)' + suffix,
                                str(real_path),
                            ):
                                maybe_path = real_path
                        check_path = maybe_path
                        if not check_path.is_absolute():
                            check_path = Path(command['directory']) / check_path
                        self.assertIsNotNone(
                            check_path,
                            f'bazel-bin path `{bin_path}` does not exist and '
                            'no real in-tree alternative include path could be '
                            'found',
                        )
                        # To remove, need to use the not-resolved path.
                        real_paths.remove(maybe_path)

                # 4. For every remaining "real" path that we didn't find a
                # valid generated include directory, unconditionally require
                # the real path to exist.
                for path in real_paths:
                    realpath = path
                    if not path.is_absolute():
                        realpath = Path(command['directory']) / path
                    self.assertTrue(
                        realpath.is_dir(),
                        f'Real include path `{path}` does not exist or is not '
                        f'a directory. File: {command["file"]}',
                    )

    def test_files_present_in_host_config(self):
        """Checks that expected files are present for the host config."""
        self._assert_file_is_in_db_for_config(
            file_pattern=_TEST_PACKAGE + r'basic_source_test\.cc',
            platform_pattern=_HOST_PLATFORM,
        )
        self._assert_file_is_in_db_for_config(
            file_pattern=_TEST_PACKAGE + r'basic_source_virt_test\.cc',
            platform_pattern=_HOST_PLATFORM,
        )

    def test_files_present_in_device_config(self):
        """Checks that expected files are present for the device config."""
        self._assert_file_is_in_db_for_config(
            file_pattern=_TEST_PACKAGE + r'basic_source_test\.cc',
            platform_pattern=_DEVICE_PLATFORM,
        )
        self._assert_file_is_in_db_for_config(
            file_pattern=_TEST_PACKAGE + r'basic_source_virt_test\.cc',
            platform_pattern=_DEVICE_PLATFORM,
        )
        self._assert_file_is_in_db_for_config(
            file_pattern=_TEST_PACKAGE + r'basic_binary_test\.cc',
            platform_pattern=_DEVICE_PLATFORM,
        )

    def test_external_file_present_in_host_config(self):
        """Checks that an external file is present for the host config."""
        self._assert_file_is_in_db_for_config(
            file_pattern=_EXTERNAL_PACKAGE + r'external_source_test\.cc',
            platform_pattern=_HOST_PLATFORM,
        )

    @classmethod
    def tearDownClass(cls):
        cls.temp_dir.cleanup()


class CompileCommandsViaBuildTest(CompileCommandsTestBase):
    """Tests compile commands generated via a forwarded `bazel build` call."""

    @classmethod
    def setUpClass(cls):
        super(CompileCommandsViaBuildTest, cls).setUpClass()

        # Run the compile commands updater.
        update_result = subprocess.run(
            [
                cls.updater_path,
                f'--out-dir={cls.temp_dir.name}',
                '--',
                'build',
                _TEST_CPP_TARGET,
            ],
            capture_output=True,
            text=True,
            check=False,
        )
        if update_result.returncode != 0:
            raise RuntimeError(
                'update_compile_commands failed: ' f'{update_result.stderr}'
            )

        cls._load_databases()


class CompileCommandsViaGlobTest(CompileCommandsTestBase):
    """Tests compile commands generated by globbing existing fragments."""

    @classmethod
    def setUpClass(cls):
        super(CompileCommandsViaGlobTest, cls).setUpClass()

        # Fragments were already built as they're a `data` dependency of this
        # test.

        # Run the updater without forwarded args to use the glob path.
        update_result = subprocess.run(
            [cls.updater_path, f'--out-dir={cls.temp_dir.name}'],
            capture_output=True,
            text=True,
            check=False,
        )
        if update_result.returncode != 0:
            raise RuntimeError(
                'update_compile_commands failed: ' f'{update_result.stderr}'
            )

        cls._load_databases()


if __name__ == '__main__':
    unittest.main()
